{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loopy: Controlling data layout\n",
    "\n",
    "## Setup code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pyopencl as cl\n",
    "import pyopencl.array\n",
    "import pyopencl.clrandom\n",
    "import loopy as lp\n",
    "\n",
    "from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Choose platform:\n",
      "[0] <pyopencl.Platform 'Portable Computing Language' at 0x7fd0ffe3c6e8>\n",
      "[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x33094e8>\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "Choice [0]: 0\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Set the environment variable PYOPENCL_CTX='0' to avoid being asked again.\n"
     ]
    }
   ],
   "source": [
    "ctx = cl.create_some_context(interactive=True)\n",
    "queue = cl.CommandQueue(ctx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A kernel on a structured array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "knl = lp.make_kernel(\n",
    "    \"{[el,dof, comp]: \"\n",
    "        \"0<=el<nels \"\n",
    "        \"and 0<=dof<14 \"\n",
    "        \"and 0<=comp < 3}\",\n",
    "    \"D[el, dof, comp] = eps[el] * E[el, dof, comp]\")\n",
    "\n",
    "knl = lp.set_options(knl, write_cl=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "eps = np.random.randn(500)\n",
    "E = cl.clrandom.rand(queue, (500, 14, 3), dtype=np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\n",
      "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(__global \u001b[36mdouble\u001b[39;49;00m *__restrict__ D, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ E, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ eps, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m nels)\n",
      "{\n",
      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m el = \u001b[34m0\u001b[39;49;00m; el <= -\u001b[34m1\u001b[39;49;00m + nels; ++el)\n",
      "    \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m dof = \u001b[34m0\u001b[39;49;00m; dof <= \u001b[34m13\u001b[39;49;00m; ++dof)\n",
      "      \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m comp = \u001b[34m0\u001b[39;49;00m; comp <= \u001b[34m2\u001b[39;49;00m; ++comp)\n",
      "        D[\u001b[34m42\u001b[39;49;00m * el + \u001b[34m3\u001b[39;49;00m * dof + comp] = eps[el] * E[\u001b[34m42\u001b[39;49;00m * el + \u001b[34m3\u001b[39;49;00m * dof + comp];\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "mknl = knl.copy()\n",
    "evt, _ = mknl(queue, eps=eps, E=E)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Changing the layout\n",
    "\n",
    "`E` and `D` are currently laid out as AoS. What if I want SoA?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
      "\n",
      "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(__global \u001b[36mdouble\u001b[39;49;00m *__restrict__ D, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ E_s0, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ E_s1, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ E_s2, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ eps, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m nels)\n",
      "{\n",
      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m el = \u001b[34m0\u001b[39;49;00m; el <= -\u001b[34m1\u001b[39;49;00m + nels; ++el)\n",
      "    \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m dof = \u001b[34m0\u001b[39;49;00m; dof <= \u001b[34m13\u001b[39;49;00m; ++dof)\n",
      "    {\n",
      "      D[\u001b[34m42\u001b[39;49;00m * el + \u001b[34m3\u001b[39;49;00m * dof] = eps[el] * E_s0[\u001b[34m14\u001b[39;49;00m * el + dof];\n",
      "      D[\u001b[34m1\u001b[39;49;00m + \u001b[34m42\u001b[39;49;00m * el + \u001b[34m3\u001b[39;49;00m * dof] = eps[el] * E_s1[\u001b[34m14\u001b[39;49;00m * el + dof];\n",
      "      D[\u001b[34m2\u001b[39;49;00m + \u001b[34m42\u001b[39;49;00m * el + \u001b[34m3\u001b[39;49;00m * dof] = eps[el] * E_s2[\u001b[34m14\u001b[39;49;00m * el + dof];\n",
      "    }\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "mknl = knl\n",
    "\n",
    "mknl  = lp.tag_array_axes(mknl, \"E\", \"c,c,sep\")\n",
    "mknl = lp.tag_inames(mknl, {\"comp\": \"unr\"})\n",
    "mknl = lp.prioritize_loops(mknl, \"el,dof,comp\")\n",
    "\n",
    "# change data format of E\n",
    "copy_knl = lp.make_copy_kernel(\"c,c,sep\")\n",
    "copy_knl = lp.fix_parameters(copy_knl, n2=3)\n",
    "evt, E_new = copy_knl(queue, input=E)\n",
    "\n",
    "evt, _ = mknl(queue, eps=eps, E=E_new)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "May want to add padding (demo).\n",
    "\n",
    "---\n",
    "\n",
    "Grouped padding exists as well."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
